#!/usr/bin/env python3

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from contextlib import contextmanager
from pathlib import Path
import os
import subprocess
import shlex
import filecmp
import re
import itertools
import argparse

project_root = Path(__file__).resolve().parent.parent
update_dependency_licenses_cmd = ('mvn license:aggregate-add-third-party@generate-and-check-licenses' +
                                  ' -Dlicense.skipAggregateAddThirdParty=false -B')


@contextmanager
def cd(newdir):
    prevdir = Path.cwd()
    os.chdir(newdir.expanduser())
    try:
        yield
    finally:
        os.chdir(prevdir)


def generate_dependency_licenses():
    """Generates DEPENDENCY-LICENSES in target. The committed DEPENDENCY-LICENSES is not modified."""
    print('Generating DEPENDENCY-LICENSES')
    update_dependency_licenses_output_to_target_cmd = (update_dependency_licenses_cmd +
                                                       ' -Dlicense.thirdPartyFilename=DEPENDENCY-LICENSES' +
                                                       ' -Dlicense.outputDirectory=target')
    subprocess.check_call(shlex.split(
        update_dependency_licenses_output_to_target_cmd))
    print('Done generating DEPENDENCY-LICENSES')


def print_file_contents(msg, file1, file2, show_summary_diff=True, show_file_contents=True):
    """
    Print contents of the files. Used for dumping the actual and expected DEPENDENCY-LICENSES files.
    :param msg: message to print about the files
    :param file1: original file
    :param file2: new file
    :param show_summary_diff: (optional, default True): if true, then print the summary of differences in the files
    :param show_file_contents: (optional, default True): if true, then print the contents of the files
    :return:
    """
    f_names = [file1, file2]
    print('*' * 80)
    print('*' * 30 + msg + ' ' + file1 + ',' + file2 + '*' * 30)

    if show_summary_diff:
        with open(file1, 'r') as f1:
            with open(file2, 'r') as f2:
                diff = set(f1).difference(f2)
        diff.discard('\n')
        print(f'***** Difference between file {file1} and {file2} *******')
        for line in diff:
            print(line)
        print('*' * 80)
    if show_file_contents:
        print('*' * 80)
        for i, f_name in enumerate(f_names):
            print('*' * 30 + ' Start of file ' + f_name + ' ' + '*' * 30)
            print('(' + str(i) + ') File ' + f_name + ' content is:')
            print('\t' + '\t'.join(open(f_name).readlines()))
            print('*' * 30 + ' End of file ' + f_name + ' ' + '*' * 30)
        print('*' * 80)


def check_dependency_licenses():
    """Compares the regenerated DEPENDENCY-LICENSES in target with the DEPENDENCY-LICENSES in the root, and verifies
    that they are identical"""
    print('Checking DEPENDENCY-LICENSES')
    if not filecmp.cmp(Path('DEPENDENCY-LICENSES'), Path('target') / 'DEPENDENCY-LICENSES', shallow=False):
        print(
            f"DEPENDENCY-LICENSES and target/DEPENDENCY-LICENSES are different. "
            f"Please update DEPENDENCY-LICENSES by running '{update_dependency_licenses_cmd}' in the project root")
        print_file_contents('Actual is different from expected', 'DEPENDENCY-LICENSES', 'target/DEPENDENCY-LICENSES')
        return False
    return True


def build_storm():
    print("Building Storm")
    subprocess.check_call(shlex.split(
        'mvn clean install -B -DskipTests -Dcheckstyle.skip -Dpmd.skip'
    ))
    print("Done building Storm")


def extract_license_report_maven_coordinates(lines):
    # Lines like " * Checker Qual (org.checkerframework:checker-qual:2.5.2 - https://checkerframework.org)"
    matches = map(lambda line: re.match(
        r'\s+\*.*\((?P<gav>.*) \- .*\).*', line), lines)
    return set(map(lambda match: match.group('gav'), filter(lambda match: match != None, matches)))


def parse_license_binary_dependencies_coordinate_set():
    """Gets the dependencies listed in LICENSE-binary"""
    license_binary_begin_binary_section = '----------------------------END OF SOURCE NOTICES -------------------------------------------'
    license_binary_lines = read_lines(project_root / 'LICENSE-binary')
    return extract_license_report_maven_coordinates(
        itertools.dropwhile(lambda line: license_binary_begin_binary_section not in line, license_binary_lines))


def extract_dependency_list_maven_coordinates(lines):
    # Lines like "   com.google.code.findbugs:jsr305:jar:3.0.2 -- module jsr305 (auto)"
    matches = map(lambda line: re.match(
        r'\s+(?P<group>\S*)\:(?P<artifact>\S*)\:(?P<type>\S*)\:(?P<version>\S*)', line), lines)
    return set(map(lambda match: match.group('group') + ':' + match.group('artifact') + ':' + match.group('version'), filter(lambda match: match != None, matches)))


def read_lines(path):
    with open(path) as f:
        return f.readlines()


def generate_storm_dist_dependencies_coordinate_set():
    """Gets the dependencies for storm-dist/binary, plus the dependencies of storm-shaded-deps"""
    generated_coordinate_set = extract_license_report_maven_coordinates(read_lines(
        project_root / 'storm-dist' / 'binary' / 'target' / 'generated-sources' / 'license' / 'THIRD-PARTY.txt'))

    # Add dependencies from storm-shaded-deps
    with cd(project_root / 'storm-shaded-deps'):
        print("Generating dependency list for storm-shaded-deps")
        subprocess.check_call(shlex.split(
            'mvn dependency:list -DoutputFile=target/deps-list -Dmdep.outputScope=false -DincludeScope=compile -B'))
        print("Done generating dependency list for storm-shaded-deps")
    shaded_dep_coordinates = extract_dependency_list_maven_coordinates(
        read_lines(project_root / 'storm-shaded-deps' / 'target' / 'deps-list'))
    shaded_dep_coordinates = set(filter(lambda coordinate: 'org.apache.storm:' not in coordinate, shaded_dep_coordinates))
    print('The storm-shaded-deps dependencies that are included when distributing storm-dist/binary are ' + str(shaded_dep_coordinates))
    print('')
    generated_coordinate_set.update(shaded_dep_coordinates)

    return generated_coordinate_set


def generate_storm_dist_license_report():
    with cd(project_root / 'storm-dist' / 'binary'):
        print('')
        print('Generating storm-dist license report')
        subprocess.check_call(shlex.split(update_dependency_licenses_cmd))
        print('Done generating storm-dist license report')


def make_license_binary_checker():
    """
    Checks that the dependencies in the storm-dist/binary license report are mentioned in LICENSE-binary,
    and vice versa.
    """
    print('Checking LICENSE-binary')

    license_binary_coordinate_set = parse_license_binary_dependencies_coordinate_set()
    generated_coordinate_set = generate_storm_dist_dependencies_coordinate_set()
    superfluous_coordinates_in_license = license_binary_coordinate_set.difference(
        generated_coordinate_set)
    coordinates_missing_in_license = generated_coordinate_set.difference(
        license_binary_coordinate_set)
    print('Done checking LICENSE-binary')

    def check_for_errors():
        if superfluous_coordinates_in_license:
            print('Dependencies in LICENSE-binary that appear unused: ')
            for coord in sorted(superfluous_coordinates_in_license):
                print(coord)
        print('')
        if coordinates_missing_in_license:
            print('Dependencies missing from LICENSE-binary: ')
            for coord in sorted(coordinates_missing_in_license):
                print(coord)
        any_wrong_coordinates = coordinates_missing_in_license or superfluous_coordinates_in_license
        if any_wrong_coordinates:
            print('LICENSE-binary needs to be updated. Please remove any unnecessary dependencies from LICENSE-binary, '
                  'and add any that are missing. You can copy any missing dependencies from DEPENDENCY-LICENSES')
        return not any_wrong_coordinates
    return check_for_errors


with cd(project_root):
    parser = argparse.ArgumentParser(description='Validate that the Storm license files are up to date (excluding NOTICE-binary and the licenses/ directory)')
    parser.add_argument('--skip-build-storm', action='store_true', help='set to skip building Storm')
    args = parser.parse_args()
    success = True

    if not args.skip_build_storm:
        build_storm()
    generate_dependency_licenses()
    generate_storm_dist_license_report()
    license_binary_checker = make_license_binary_checker()
    success = check_dependency_licenses() and success
    success = license_binary_checker() and success
    if not success:
        print('Some license files are not up to date, see above for the relevant error message')
        exit(1)
    print('License files are up to date')
    exit(0)
